import requests
from lxml import etree
#xpath语法:精确的标签://a[@属性="属性值"]
#


url='http://news.baidu.com/'
headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
        }
data=requests.get(url,headers=headers).content.decode()
xpath_data=etree.HTML(data)

results=xpath_data.xpath('//div[@class="hotnews"]')
for result in results:
    hrefs=result.xpath('//ul[@class="ulist focuslistnews"]/li/a/@href')
    texts=result.xpath('//ul[@class="ulist focuslistnews"]/li/a/text()')
    #text=result.xpath('//*[@id="pane-news"]/ul/li/a/following-sibling::*/text()')#下一个节点(平级关系) following-sibling::*
    text = result.xpath('//*[@id="pane-news"]/ul/li/a/text()')
    print(text)
    li=result.xpath('//*[@id="pane-news"]/ul/li/a')
    print(len(texts))
#